## Analyze result

In [None]:
import os

from utils import apply_diff, display_side_by_side_diff, extract_code_from_response, load_from_jsonl


def analyze_result(example, mode):
    """
    Analyze a single result example and print human-readable differences.

    Args:
        example: One result example
        mode: Either "find_replace" or "fully_rewrite"
    """
    print(f"\n{'=' * 60}")
    print(f"Example ID: {example.get('id', 'unknown')} | Language: {example.get('language', 'unknown')}")
    print(f"{'=' * 60}")

    ground_truth = example["ground_truth"]
    model_response = example["model_response"]

    if mode == "find_replace":
        original_code = example["original_code"]
        success, model_code = apply_diff(original_code, model_response)

        if not success:
            print("❌ DIFF APPLICATION FAILED")
            return

        print("✅ Diff applied successfully")
    else:  # fully_rewrite
        model_code = extract_code_from_response(model_response)

    # Check if they match
    exact_match = model_code == ground_truth
    print(f"Exact Match: {'✅ YES' if exact_match else '❌ NO'}")

    if exact_match:
        print("Perfect match! No differences to show.")
        return

    # Show the differences
    display_side_by_side_diff(ground_truth, model_code)


# Example usage:
if __name__ == "__main__":
    # Load results
    res_file_name = "fully_rewrite_gpt-5_results.jsonl"
    mode = "find_replace" if "find_replace" in res_file_name else "fully_rewrite"
    file_path = os.path.join("./results", res_file_name)
    results = load_from_jsonl(file_path)

    def find_random_failed_case():
        import random

        random_id = random.randint(0, len(results) - 1)
        while results[random_id]["label"]:
            random_id = random.randint(0, len(results) - 1)
        return random_id

    random_id = find_random_failed_case()
    analyze_result(results[random_id], mode)


Example ID: 260 | Language: python
Exact Match: ❌ NO

SIDE-BY-SIDE COMPARISON
Ground Truth                                                                                                                | Model Output                                                                                                               
--------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------
from __future__ import annotations                                                                                          | from __future__ import annotations                                                                                         
                                                                                                                            |                                              

## Analyze checkpoint