# Phase 5: 3-Tier Hybrid DPO - Tier 1 & 2 Generation

## The 3-Tier Strategy

| Tier | Type | Count | Method |
|------|------|-------|--------|
| 1 | Hard Pairs | 500 | Active Learning ‚Üí YOU annotate |
| 2 | Obvious Pairs | 500 | Automated (extreme violations) |
| 3 | Bootstrapped | 500 | After annotation (next notebook) |

**This notebook**: Creates Tier 1 + Tier 2 pairs from your Phase 5 output

In [None]:
# CELL 1: SETUP
import json
import random
import numpy as np
from pathlib import Path
from collections import defaultdict

DATA_INPUT = Path('/kaggle/input/gricebench-scientific-fix')
OUTPUT_DIR = Path('/kaggle/working')
random.seed(42)

print('3-Tier Hybrid DPO System')

In [None]:
# CELL 2: LOAD PHASE 5 OUTPUT (Your 1,500 pairs)
print('=' * 70)
print('LOADING PHASE 5 OUTPUT')
print('=' * 70)

# Load the preference pairs you generated
pairs_path = DATA_INPUT / 'preference_pairs_1500.json'
if pairs_path.exists():
    with open(pairs_path, 'r', encoding='utf-8') as f:
        all_pairs = json.load(f)
    print(f'Loaded {len(all_pairs)} preference pairs')
else:
    print('ERROR: preference_pairs_1500.json not found!')
    print('Please add your Phase 5 output to the dataset')
    all_pairs = []

In [None]:
# CELL 3: TIER 1 - ACTIVE LEARNING (Select 500 Hardest Pairs)
print('=' * 70)
print('TIER 1: ACTIVE LEARNING - SELECTING HARD PAIRS')
print('=' * 70)

def compute_uncertainty(pair):
    """
    Estimate uncertainty/difficulty of a pair.
    Hard pairs = responses are similar in length, style, content
    """
    resp_a = pair.get('response_A', '')
    resp_b = pair.get('response_B', '')
    
    # Length similarity (hard if similar length)
    len_diff = abs(len(resp_a) - len(resp_b)) / max(len(resp_a), len(resp_b), 1)
    len_similarity = 1 - len_diff
    
    # Word overlap (hard if similar words)
    words_a = set(resp_a.lower().split())
    words_b = set(resp_b.lower().split())
    if len(words_a | words_b) > 0:
        jaccard = len(words_a & words_b) / len(words_a | words_b)
    else:
        jaccard = 0
    
    # Combine: Higher = harder to distinguish
    uncertainty = (len_similarity + jaccard) / 2
    
    return uncertainty

# Score all pairs
pair_scores = []
for pair in all_pairs:
    uncertainty = compute_uncertainty(pair)
    pair_scores.append((pair, uncertainty))

# Sort by uncertainty (hardest first)
pair_scores.sort(key=lambda x: x[1], reverse=True)

# Take top 500 hardest
tier1_hard_pairs = [p for p, _ in pair_scores[:500]]

print(f'Selected {len(tier1_hard_pairs)} hardest pairs for human annotation')
print(f'Uncertainty range: {pair_scores[0][1]:.3f} - {pair_scores[499][1]:.3f}')

In [None]:
# CELL 4: TIER 2 - GENERATE OBVIOUS VIOLATION PAIRS
print('=' * 70)
print('TIER 2: GENERATING OBVIOUS VIOLATION PAIRS')
print('=' * 70)

# Obvious violation generators
OBVIOUS_VIOLATIONS = {
    'offtopic': [
        "Let me tell you about my favorite pizza toppings instead.",
        "Speaking of cats, have you ever seen a rainbow?",
        "The weather is nice today. I like blue.",
        "Random thoughts about nothing related to what you asked.",
    ],
    'contradiction': [
        " Actually, everything I just said is false.",
        " Wait, no. The opposite is true.",
        " But then again, none of this is accurate.",
    ],
    'nonsense': [
        "Asdf jkl qwer uiop zxcv bnm.",
        "The purple elephant danced with quantum cheese.",
        "Blah blah blah blah blah blah blah.",
        "$$%%^^&&**!!@@##",
    ],
    'extreme_verbose': [
        "Let me explain this in EXTREME detail. First, I want to provide extensive background. Then, I'll elaborate on every single point. Additionally, I'll add more context. Furthermore, there's more to discuss. Moreover, we should consider all angles. In conclusion, after all this, ",
    ],
}

def create_obvious_pair(context, good_response, violation_type):
    """Create a pair where good_response is obviously better."""
    if violation_type == 'offtopic':
        bad_response = random.choice(OBVIOUS_VIOLATIONS['offtopic'])
    elif violation_type == 'contradiction':
        bad_response = good_response + random.choice(OBVIOUS_VIOLATIONS['contradiction'])
    elif violation_type == 'nonsense':
        bad_response = random.choice(OBVIOUS_VIOLATIONS['nonsense'])
    elif violation_type == 'extreme_verbose':
        bad_response = random.choice(OBVIOUS_VIOLATIONS['extreme_verbose']) + good_response + " " + good_response
    else:
        bad_response = "I don't know."
    
    return {
        'context': context,
        'response_A': good_response,  # Always the good one
        'response_B': bad_response,   # Always the bad one
        'preference': 'A_much',       # Pre-labeled
        'violation_type': violation_type,
        'tier': 2,
        'auto_labeled': True,
    }

# Generate 500 obvious pairs
tier2_obvious_pairs = []
violation_types = ['offtopic', 'contradiction', 'nonsense', 'extreme_verbose']

# Use remaining pairs (not in tier 1) as source
remaining_pairs = [p for p, _ in pair_scores[500:]]

for i in range(500):
    source_pair = remaining_pairs[i % len(remaining_pairs)]
    context = source_pair.get('context', '')
    good_response = source_pair.get('response_A', '')  # Use A as good
    
    violation_type = violation_types[i % 4]
    obvious_pair = create_obvious_pair(context, good_response, violation_type)
    obvious_pair['id'] = f'tier2_{i}'
    tier2_obvious_pairs.append(obvious_pair)

print(f'Generated {len(tier2_obvious_pairs)} obvious violation pairs')

# Show distribution
type_counts = defaultdict(int)
for p in tier2_obvious_pairs:
    type_counts[p['violation_type']] += 1
print('Distribution:', dict(type_counts))

In [None]:
# CELL 5: PREPARE TIER 1 FOR ANNOTATION
print('=' * 70)
print('PREPARING TIER 1 FOR ANNOTATION')
print('=' * 70)

# Create annotation-ready format
tier1_for_annotation = []
for i, pair in enumerate(tier1_hard_pairs):
    tier1_for_annotation.append({
        'id': f'tier1_{i}',
        'context': pair.get('context', ''),
        'response_A': pair.get('response_A', ''),
        'response_B': pair.get('response_B', ''),
        'preference': '',  # YOU fill: A_much, A_slight, equal, B_slight, B_much
        'reason': '',      # Optional: informative, accurate, on_topic, clear
        'tier': 1,
        'annotated': False,
    })

print(f'Prepared {len(tier1_for_annotation)} pairs for annotation')

In [None]:
# CELL 6: SHOW SAMPLES
print('=' * 70)
print('SAMPLE HARD PAIRS (Tier 1)')
print('=' * 70)

for i, pair in enumerate(tier1_for_annotation[:2]):
    print(f"\n--- Hard Pair {i+1} ---")
    print(f"Context: {pair['context'][:80]}...")
    print(f"A: {pair['response_A'][:60]}...")
    print(f"B: {pair['response_B'][:60]}...")
    print("[These are HARD - responses look similar]")

print('\n' + '=' * 70)
print('SAMPLE OBVIOUS PAIRS (Tier 2)')
print('=' * 70)

for i, pair in enumerate(tier2_obvious_pairs[:2]):
    print(f"\n--- Obvious Pair {i+1} ({pair['violation_type']}) ---")
    print(f"A (good): {pair['response_A'][:60]}...")
    print(f"B (bad):  {pair['response_B'][:60]}...")
    print(f"[OBVIOUS - A is clearly better]")
    print(f"Auto-preference: {pair['preference']}")

In [None]:
# CELL 7: SAVE OUTPUTS
print('=' * 70)
print('SAVING OUTPUTS')
print('=' * 70)

# Save Tier 1 for annotation
tier1_path = OUTPUT_DIR / 'tier1_hard_pairs.json'
with open(tier1_path, 'w', encoding='utf-8') as f:
    json.dump(tier1_for_annotation, f, indent=2, ensure_ascii=False)
print(f'‚úÖ Saved {len(tier1_for_annotation)} hard pairs for annotation')

# Save Tier 2 (already labeled)
tier2_path = OUTPUT_DIR / 'tier2_obvious_pairs.json'
with open(tier2_path, 'w', encoding='utf-8') as f:
    json.dump(tier2_obvious_pairs, f, indent=2, ensure_ascii=False)
print(f'‚úÖ Saved {len(tier2_obvious_pairs)} obvious pairs (auto-labeled)')

In [None]:
# CELL 8: SUMMARY
print('\n' + '=' * 70)
print('3-TIER HYBRID DPO - TIER 1 & 2 COMPLETE')
print('=' * 70)

print(f'\nüìä SUMMARY:')
print(f'   Tier 1 (HARD pairs): {len(tier1_for_annotation)} ‚Üí YOU annotate')
print(f'   Tier 2 (OBVIOUS pairs): {len(tier2_obvious_pairs)} ‚Üí Auto-labeled ‚úÖ')
print(f'   Tier 3 (BOOTSTRAPPED): 500 ‚Üí After your annotation')

print(f'\nüìÅ OUTPUT FILES:')
print(f'   tier1_hard_pairs.json - Download & annotate with HTML interface')
print(f'   tier2_obvious_pairs.json - Already labeled, ready for training')

print(f'\nüìã YOUR NEXT STEPS:')
print(f'   1. Download tier1_hard_pairs.json')
print(f'   2. Open annotation_interface.html')
print(f'   3. Load the file and annotate (~500 pairs, ~4 hours)')
print(f'   4. Save as tier1_annotated.json')
print(f'   5. Upload to Kaggle dataset')
print(f'   6. Run Phase 5 Training notebook')

print(f'\n‚è±Ô∏è TIME ESTIMATE:')
print(f'   Annotation: ~30 seconds/pair √ó 500 = ~4 hours')
print(f'   Tip: Do 100 pairs/day over 5 days')

print('\n' + '=' * 70)